{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Couldn't import dot_parser, loading of dot files will not be possible.\n"
     ]
    }
   ],
   "source": [
    "from __future__ import print_function, division\n",
    "from functools import partial\n",
    "from StringIO import StringIO\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import theano\n",
    "import theano.tensor as T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "### utils\n",
    "def sfloatX(data):\n",
    "    \"\"\"Convert scalar to floatX\"\"\"\n",
    "    return getattr(np, theano.config.floatX)(data)\n",
    "\n",
    "\n",
    "def none_to_dict(data):\n",
    "    return {} if data is None else data\n",
    "\n",
    "\n",
    "def ndim_tensor(name, ndim, dtype=theano.config.floatX):\n",
    "    tensor_type = T.TensorType(dtype=dtype, broadcastable=((False,) * ndim))\n",
    "    return tensor_type(name=name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from lasagne.layers.helper import get_all_layers, get_all_params, get_output\n",
    "\n",
    "class Net(object):\n",
    "    def __init__(self, output_layer):\n",
    "        self.layers = get_all_layers(output_layer)\n",
    "        self._deterministic_output_func = None\n",
    "        \n",
    "    @property\n",
    "    def deterministic_output_func(self):\n",
    "        if self._deterministic_output_func is None:\n",
    "            self._deterministic_output_func = self._compile_deterministic_output_func()\n",
    "        return self._deterministic_output_func\n",
    "        \n",
    "    def _compile_deterministic_output_func(self):\n",
    "        network_input = self.get_symbolic_input()\n",
    "        deterministic_output = self.get_symbolic_output(deterministic=True)\n",
    "        net_output_func = theano.function(\n",
    "            inputs=[network_input],\n",
    "            outputs=deterministic_output,\n",
    "            on_unused_input='warn',\n",
    "            allow_input_downcast=True\n",
    "        )\n",
    "        return net_output_func\n",
    "   \n",
    "    def get_symbolic_input(self):\n",
    "        network_input = self.layers[0].input_var\n",
    "        return network_input\n",
    "    \n",
    "    def get_symbolic_output(self, deterministic=False):\n",
    "        network_input = self.get_symbolic_input()\n",
    "        network_output = get_output(\n",
    "            self.layers[-1], network_input, deterministic=deterministic)\n",
    "        return network_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from lasagne.updates import nesterov_momentum\n",
    "from lasagne.objectives import aggregate, squared_error\n",
    "\n",
    "\n",
    "class Trainer(object):\n",
    "    def __init__(self, net, data_pipeline,\n",
    "                 loss_func=squared_error, \n",
    "                 loss_aggregation_mode='mean',\n",
    "                 updates_func=partial(nesterov_momentum, momentum=0.9),\n",
    "                 learning_rate=1e-2,\n",
    "                 callbacks=None,\n",
    "                 repeat_callbacks=None):\n",
    "        self.net = net\n",
    "        self.data_pipeline = data_pipeline\n",
    "        self.validation_batch = self.data_pipeline.get_batch(validation=True)\n",
    "        def _loss_func(prediction, target):\n",
    "            loss = loss_func(prediction, target)\n",
    "            return aggregate(loss, mode=loss_aggregation_mode)\n",
    "        self.loss_func = _loss_func\n",
    "        self.updates_func = updates_func\n",
    "        self._learning_rate = theano.shared(sfloatX(learning_rate), name='learning_rate')\n",
    "        \n",
    "        # Callbacks\n",
    "        def callbacks_dataframe(lst):\n",
    "            return pd.DataFrame(lst, columns=['iteration', 'function'])\n",
    "        self.callbacks = callbacks_dataframe(callbacks)\n",
    "        self.repeat_callbacks = callbacks_dataframe(repeat_callbacks)\n",
    "        \n",
    "        # Training and validation state\n",
    "        self._train_func = None\n",
    "        self._validation_cost_func = None\n",
    "        self.training_costs = []\n",
    "        self.validation_costs = []\n",
    "        self.iteration = 0\n",
    "        \n",
    "    @property\n",
    "    def train_func(self):\n",
    "        if self._train_func is None:\n",
    "            self._trian_func = self._compile_train_func()\n",
    "        return self._train_func\n",
    "    \n",
    "    @property\n",
    "    def validation_cost_func(self):\n",
    "        if self._validation_cost_func is None:\n",
    "            self._validation_cost_func = self._compile_validation_cost_func()\n",
    "        return self._validation_cost_func\n",
    "        \n",
    "    def _compile_train_func(self):\n",
    "        network_input = self.net.get_symbolic_input()\n",
    "        train_output = self.net.get_symbolic_output(deterministic=False)\n",
    "        target_var = ndim_tensor(name='target', ndim=train_output.ndim)\n",
    "        train_loss = self.loss_func(train_output, target_var)\n",
    "        all_params = get_all_params(self.layers[-1], trainable=True)\n",
    "        updates = self.updates_func(train_loss, all_params, learning_rate=self._learning_rate)\n",
    "        train_func = theano.function(\n",
    "            inputs=[network_input, target_var],\n",
    "            outputs=train_loss,\n",
    "            updates=updates,\n",
    "            on_unused_input='warn',\n",
    "            allow_input_downcast=True)\n",
    "        return train_func\n",
    "    \n",
    "    def _compile_validation_cost_func(self):\n",
    "        network_input = self.net.get_symbolic_input()\n",
    "        deterministic_output = self.net.get_symbolic_output(deterministic=True)\n",
    "        validation_loss = self.loss_func(deterministic_output, target_var)\n",
    "        validation_cost_func = theano.function(\n",
    "            inputs=[network_input, target_var],\n",
    "            outputs=[validation_loss, deterministic_output],\n",
    "            on_unused_input='warn',\n",
    "            allow_input_downcast=True)\n",
    "        return validation_cost_func\n",
    "\n",
    "    @property\n",
    "    def learning_rate(self):\n",
    "        return self._learning_rate.get_value()\n",
    "\n",
    "    @learning_rate.setter\n",
    "    def learning_rate(self, rate):\n",
    "        rate = sfloatX(rate)\n",
    "        self.logger.info(\n",
    "            \"Iteration {:d}: Change learning rate to {:.1E}\"\n",
    "            .format(self.n_iterations(), rate))\n",
    "        self._learning_rate.set_value(rate)\n",
    "        \n",
    "    def fit(self, num_iterations=None):\n",
    "        while self.iteration != num_iterations:\n",
    "            self.iteration = len(training_costs)\n",
    "                    \n",
    "            # Training\n",
    "            self.train_batch = self.data_pipeline.get_batch()\n",
    "            train_cost = self.train_func(self.train_batch.input, self.train_batch.target)\n",
    "            self.training_costs.append(train_cost.flatten()[0])\n",
    "            \n",
    "            # Callbacks\n",
    "            def run_callbacks(df):\n",
    "                for callback in df['function']:\n",
    "                    callback(self)\n",
    "            repeat_callbacks = self.repeat_callbacks[(iteration % self.repeat_callbacks['iteration']) == 0]\n",
    "            run_callbacks(repeat_callbacks)\n",
    "            callbacks = self.callbacks[self.callbacks['iteration'] == iteration]\n",
    "            run_callbacks(callbacks)\n",
    "            \n",
    "    def validate(self):\n",
    "        pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>on_power_threshold</th>\n",
       "      <th>max_power</th>\n",
       "      <th>min_on_duration</th>\n",
       "      <th>min_off_duration</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>kettle</th>\n",
       "      <td>10</td>\n",
       "      <td>2500</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>toaster</th>\n",
       "      <td>100</td>\n",
       "      <td>2000</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                on_power_threshold   max_power   min_on_duration  \\\n",
       "kettle                          10        2500                20   \n",
       "toaster                        100        2000                20   \n",
       "\n",
       "          min_off_duration  \n",
       "kettle                  30  \n",
       "toaster                 30  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "APPLIANCE_PARAMS_CSV = StringIO(\"\"\"\n",
    "       on_power_threshold, max_power, min_on_duration, min_off_duration\n",
    "kettle,                10,      2500,              20,               30\n",
    "toaster,              100,      2000,              20,               30\n",
    "\"\"\")\n",
    "\n",
    "APPLIANCE_PARAMS = pd.read_csv(APPLIANCE_PARAMS_CSV, index_col=0)\n",
    "APPLIANCE_PARAMS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from lasagne.layers import InputLayer, RecurrentLayer, DenseLayer, ReshapeLayer\n",
    "\n",
    "def get_net_0(input_shape, target_shape=None):\n",
    "    NUM_UNITS = {'dense_layer_0': 100}\n",
    "    if target_shape is None:\n",
    "        target_shape = input_shape\n",
    "    \n",
    "    # Define layers\n",
    "    input_layer = InputLayer(\n",
    "        shape=input_shape\n",
    "    )\n",
    "    dense_layer_0 = DenseLayer(\n",
    "        input_layer, \n",
    "        num_units=NUM_UNITS['dense_layer_0']\n",
    "    )\n",
    "    final_dense_layer = DenseLayer(\n",
    "        dense_layer_0,\n",
    "        num_units=target_shape[1] * target_shape[2]\n",
    "    )\n",
    "    output_layer = ReshapeLayer(\n",
    "        final_dense_layer,\n",
    "        shape=target_shape\n",
    "    )\n",
    "    return output_layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "output_layer = get_net_0((4, 16, 1))\n",
    "net = Net(output_layer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Disaggregator(object):\n",
    "    def __init__(self, mains, output_path, stride=1):\n",
    "        self.mains = mains\n",
    "        self.stride = stride\n",
    "        self.output_path = output_path\n",
    "        seq_length = data_pipeline.seq_length\n",
    "        num_seq_per_batch = data_pipeline.num_seq_per_batch\n",
    "    \n",
    "    def _get_mains_data_pipeline(self, trainer):\n",
    "        data_pipeline = trainer.data_pipeline\n",
    "        mains_source = MainsSource(\n",
    "            self.mains,\n",
    "            seq_length=data_pipeline.source.seq_length,\n",
    "            stride=self.stride\n",
    "        )\n",
    "        mains_data_pipeline = DataPipeline(\n",
    "            mains_source,\n",
    "            num_seq_per_batch=data_pipeline.num_seq_per_batch,\n",
    "            input_processsing=data_pipeline.input_processing\n",
    "        )\n",
    "        return mains_data_pipeline\n",
    "    \n",
    "    def disaggregate(self, trainer):\n",
    "        net = trainer.net\n",
    "        mains_data_pipeline = self._get_mains_data_pipeline(trainer)\n",
    "        while True:\n",
    "            try:\n",
    "                batch = mains_data_pipeline.get_batch()\n",
    "            except:\n",
    "                break\n",
    "            output_for_batch = net.deterministic_output_func(batch.input)\n",
    "            output_for_batch = mains_data_pipeline.convert_to_watts(output_for_batch)\n",
    "            # TODO:\n",
    "            # merge overlapping segments\n",
    "            # save disag output to self.output_path\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plotter = Plotter()\n",
    "disaggregator = Disaggregator()\n",
    "\n",
    "\n",
    "trainer = Trainer(\n",
    "    net,\n",
    "    data_pipeline=None,\n",
    "    repeat_callbacks=[\n",
    "        (  10, Trainer.validate), \n",
    "        (1000, plotter.plot),\n",
    "        (1000, disaggregator.disaggregate),\n",
    "        (1000, disag_metrics.run_metrics)\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_data_pipeline_0():\n",
    "    APPLIANCES = ['fridge', 'kettle', 'toaster']\n",
    "    TARGET_APPLIANCE = 'kettle'\n",
    "    NILMTK_FILENAME = '/data/mine/vadeec/merged/ukdale.h5'\n",
    "    SEQ_LENGTH = 1024\n",
    "    CONFIG = {\n",
    "        'nilmtk_activations': {\n",
    "            'appliances': APPLIANCES,\n",
    "            'filename': NILMTK_FILENAME,\n",
    "            'window_per_building': {},\n",
    "            'buildings': [1]\n",
    "        },\n",
    "        'tracebase_activations': {\n",
    "            'appliances': APPLIANCES,\n",
    "            'path': '/data/tracebase'\n",
    "        }\n",
    "    }\n",
    "\n",
    "    nilmtk_activations = load_nilmtk_activations(**CONFIG['nilmtk_activations'])    \n",
    "    tracebase_activations = load_tracebase_activations(**CONFIG['tracebase_activations'])\n",
    "    \n",
    "    # These activations are dicts of the form:\n",
    "    # {'kettle': {'UK-DALE_house_1': [], 'TraceBase_house_1': []}}\n",
    "    \n",
    "    all_activations = merge_activations(nilmtk_activations, tracebase_activations)\n",
    "    \n",
    "    synthetic_source = SyntheticAggregateSource(\n",
    "        all_activations, \n",
    "        target_appliance=TARGET_APPLIANCE,\n",
    "        uniform_prob_of_selecting_each_building=True,\n",
    "        seq_length=SEQ_LENGTH\n",
    "    )\n",
    "    \n",
    "    # Real data\n",
    "    real_source = NILMTKSource(\n",
    "        filename=NILMTK_FILENAME,\n",
    "        target_appliance=TARGET_APPLIANCE,\n",
    "        train_window_per_building={},\n",
    "        train_building=[1, 2, 3],\n",
    "        validation_buildings=[5],\n",
    "        seq_length=SEQ_LENGTH\n",
    "    )\n",
    "    \n",
    "    # Get standard deviation of input\n",
    "    sample_sequences = []\n",
    "    NUM_SEQ_TO_SAMPLE = 512\n",
    "    for i in range(NUM_SEQ_TO_SAMPLE):\n",
    "        sample_sequences.append(real_source.get_data().flatten())\n",
    "    sample = np.concatenate(sample_sequences)\n",
    "    del sample_sequences\n",
    "    input_std = sample.std()\n",
    "    del sample\n",
    "    \n",
    "    data_pipeline = DataPipeline(\n",
    "        sources=[synthetic_source, real_source],\n",
    "        train_probs=[0.5, 0.5],\n",
    "        validation_probs=[0.0, 1.0],\n",
    "        num_seq_per_batch=64,\n",
    "        input_processing=[\n",
    "            DivideBy(input_std), \n",
    "            Downsample(3),\n",
    "            IndepdendentlyCentre()\n",
    "        ],\n",
    "        target_processing=[\n",
    "            DivideBy(APPLIANCE_PARAMS['max_power'][TARGET_APPLIANCE])\n",
    "        ]\n",
    "    )\n",
    "    \n",
    "    data_pipeline_thread = DataPipeLineThread(data_pipeline)\n",
    "    data_pipeline_thread.start()\n",
    "    return data_pipeline_thread\n",
    "\n",
    "\n",
    "experiments = [\n",
    "        {\n",
    "            'data_pipeline': get_data_pipeline_0,\n",
    "            'net': get_net_0\n",
    "        }\n",
    "    ]\n",
    "\n",
    "def run_experiments():\n",
    "    for experiment in experiments:\n",
    "        data_pipeline = experiment['data_pipeline']()\n",
    "        validation_data = pipeline.get_data(validation=True)\n",
    "        input_shape = validation_data.shape\n",
    "        \n",
    "        output_layer = experiment['net'](input_shape)\n",
    "        net = Net(output_layer)\n",
    "        \n",
    "        trainer = Trainer(\n",
    "            net=net,\n",
    "            data_pipeline=data_pipeline,\n",
    "            loss_function=squared_error,\n",
    "            repeat_callbacks={100: [plotter.plot_estimates, save_parameters, PlotTSNE(layers=[2])]}\n",
    "        )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
